#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Project: spd-sxmcc
"""
@author: lyndon
@time Created on 2018/12/24 13:59
@desc
"""

import requests
from bs4 import BeautifulSoup

user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
headers = {"User-Agent": user_agent}  # 请求头,headers是一个字典类型
url = "http://ty.5i5j.com/exchange/n{}"  # 太原
apartments = []
for i in range(1, 2):
    res = requests.get(url.format(i), headers=headers)
    res.encoding = "utf-8"  # 设置编码，防止乱码
    print(res)
    print(type(res))
    print(res.text)
    print("===========================================")
    soup = BeautifulSoup(res.text, "lxml")
    lis = soup.select('.list-info')
    for li in lis:
        apartment = {}
        apartment['id'] = int(li.select("a[href*='exchange']")[0]['href'].split('/')[-1].strip())  # 交易号
        apartment['community'] = li.select("ul.list-info-l a[href*='community']")[0].text.strip()  # 小区
        apartment['communityId'] = int(
            li.select("ul.list-info-l a[href*='community']")[0]['href'].split('/')[-1].strip())  # 小区编号
        # apartment['adress']=li.select('ul.list-info-l a:nth-of-type(2)')[0].text.strip()#地址
        apartment['type'] = li.select('li.font-balck span:nth-of-type(1)')[0].text.strip()  # 房型
        apartment['price'] = int(li.select('.list-info-r p')[0].text.rstrip('元/平米').strip())  # 单价 元/平米
        apartment['totalPrice'] = int(li.select('div.list-info-r h3')[0].text.rstrip('万元').strip())  # 抓取总价 万元
        apartments.append(apartment)
import pandas as pd

df = pd.DataFrame(apartments)
df.to_excel("taiyuanershoufang.xlsx")
